We simulate a random sample of 1000 Normal\((\mu=0, \sigma=5)\) IID random variables.
n <- 1000 x <- rnorm(n, mean = 0, sd = 5)
We simulate a random sample of 1000 Normal\((\mu=0, \sigma=5)\) IID random variables.
n <- 1000 x <- rnorm(n, mean = 0, sd = 5)
The log-likelihood function can be computed with the following function:
l <- function(x, mu, sigma){
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
We plot the log-likelihood function for \((\mu, \sigma)\) given \(\vec{x}\), saving the values in l_surface (code hidden).
library(plotly) plot_ly(z = ~l_surface) %>% add_surface(x = sigma, y = mu)
We compute the MLE’s \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) for \((\mu, \sigma)\):
xbar <- sum(x)/n sigma_hat <- sqrt((1 / n) * sum((x - xbar) ^ 2)) c(xbar, sigma_hat)
## [1] -0.2091463 4.9580846
The MLE \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) are the parameter values that best explain/fit the observed data x.
A group of students notes the production code numbers of each of their iphones as follows:
## [1] 8666 49413 28421 55305 45695 29439
Using common-sense, use this data to come up with an estimate of the total number of phones produced by Apple in this production run.
Two paradigms for visualizing a function in a computational framework:
Both require writing functions.
my_exponent <- function(a, b) {
a^b
}
my_exponent(3, 2)
## [1] 9
function() function.\[ L(p) = p^{\sum_{i=1}^n x_i}(1 - p)^{n - \sum_{i=1}^n x_i} \]
L_binom <- function(p, x) {
n <- length(x)
n_success <- sum(x)
p ^ n_success * (1 - p) ^ (n - n_success)
}
x <- c(1, 1, 0, 1, 0)
L_binom(p = .5, x = x)
## [1] 0.03125
L_binom(3/5, x = x)
## [1] 0.03456
In this approach, you
p_vec <- seq(from = 0, to = 1, by = .005) head(p_vec)
## [1] 0.000 0.005 0.010 0.015 0.020 0.025
L_vec <- L_binom(p_vec, x) head(L_vec)
## [1] 0.000000e+00 1.237531e-07 9.801000e-07 3.274509e-06 7.683200e-06 ## [6] 1.485352e-05
df <- data.frame(x = p_vec,
y = L_vec)
ggplot(df, aes(x = x, y = y)) +
geom_line()
It’s helpful to see how the likelihood responds to increasing data.
x_big <- rep(x, 10)
L_vec <- L_binom(p_vec, x_big)
df <- data.frame(x = p_vec,
y = L_vec)
ggplot(df, aes(x = x, y = y)) + geom_line()
The second method doesn’t require the discretization of the function.
ggplot(data.frame(x = c(0, 1)), aes(x)) + stat_function(fun = L_binom, args = list(x = x_big))
\[ l(\mu, \sigma) = -\frac{n}{2}\log(2\pi) - n \log (\sigma) - \frac{1}{2 \sigma^2} \sum_{i = 1}^{n} \left(x_i - \mu\right) ^2 \]
l_norm <- function(mu, sigma, x) {
n <- length(x)
p1 <- -n / 2 * log(2 * pi)
p2 <- -n * log(sigma)
p3 <- -1 / (2 * sigma^2) * sum((x - mu)^2)
p1 + p2 + p3
}
l_norm_quick <- function(mu, sigma, x) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
l_norm(0, 5, x)
## [1] -12.70188
l_norm_quick(0, 5, x)
## [1] -12.70188
x <- rnorm(100, mean = 0, sd = 5)
In 2D, we have to define a grid of possible parameter values and compute the likelihood for each coordinate
# Define 2D domain mu <- seq(-1.5, 1.5, length = 500) sigma <- seq(4, 6, length = 500)
# Compute log-likelihood
l_surface <- matrix(0, nrow = length(mu), ncol = length(sigma))
for(i in 1:nrow(l_surface)) {
for(j in 1:ncol(l_surface)) {
l_surface[i, j] <- l_norm(mu = mu[i], sigma = sigma[j], x)
}
}
library(plotly) plot_ly(z = ~l_surface) %>% add_surface(x = sigma, y = mu)
We compute the MLE’s \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) for \((\mu, \sigma)\):
n <- length(x) xbar <- sum(x)/n sigma_hat <- sqrt((1 / n) * sum((x - xbar) ^ 2)) c(xbar, sigma_hat)
## [1] -0.2574092 4.5197462
set.seed(301)
x <- sample(1:20, 3)
L_unif <- function(beta, x) {
n <- length(x)
(1 / beta) ^ n
}
ggplot(data.frame(x = c(0, 20)), aes(x)) + stat_function(fun = L_unif, args = list(x = x))
ggplot(data.frame(x = c(0, 20)), aes(x)) + stat_function(fun = L_unif, args = list(x = x))+ xlim(c(max(x), 20))
We simulate a random sample of 20 Unif\((\alpha=0, \beta=30)\) IID uniform variables.
n <- 20 x <- runif(n, min = 0, max = 30) 2 * mean(x)
## [1] 37.04091
We simulate a many random samples of 20 Unif\((\alpha=0, \beta=30)\) IID uniform variables.
it <- 1000
mom <- rep(NA, it)
for (i in 1:it) {
n <- 20
x <- runif(n, min = 0, max = 30)
mom[i] <- 2 * mean(x)
}
df <- data.frame(mom)
mean_mom <- mean(mom)
library(tidyverse) ggplot(df, aes(x = mom)) + geom_density(lwd = 1.2) + geom_vline(xintercept = mean_mom, col = "steelblue", lty = 2, lwd = 1.2)
We also consider the MLE estimate.
it <- 1000
mom <- rep(NA, it)
mle <- rep(NA, it)
for (i in 1:it) {
n <- 20
x <- runif(n, min = 0, max = 30)
mom[i] <- 2 * mean(x)
mle[i] <- max(x)
}
df <- data.frame(stat = c(mom, mle),
estimator = rep(c("MOM", "MLE"),
times = c(it, it)))
mean_mom <- mean(mom)
mean_mle <- mean(mle)
ggplot(df, aes(x = stat, fill = estimator)) + geom_density(alpha = .3) + geom_vline(xintercept = mean_mom, col = "steelblue", lty = 2, lwd = 1.2) + geom_vline(xintercept = mean_mle, col = "tomato", lty = 2, lwd = 1.2)
it <- 1000
mom <- rep(NA, it)
mle <- rep(NA, it)
for (i in 1:it) {
n <- 20
x <- runif(n, min = 0, max = 30)
mom[i] <- 2 * mean(x)
mle[i] <- ((n + 1) / n) * max(x)
}
df <- data.frame(stat = c(mom, mle),
estimator = rep(c("MOM", "MLE"),
times = c(it, it)))
mean_mom <- mean(mom)
mean_mle <- mean(mle)
ggplot(df, aes(x = stat, fill = estimator)) + geom_density(alpha = .3) + geom_vline(xintercept = mean_mom, col = "steelblue", lty = 2, lwd = 1.2) + geom_vline(xintercept = mean_mle, col = "tomato", lty = 2, lwd = 1.2)
df %>% group_by(estimator) %>% summarize(var = var(stat))
## # A tibble: 2 x 2 ## estimator var ## <fct> <dbl> ## 1 MLE 2.05 ## 2 MOM 14.0
We demonstrate the consistency of
\[ \hat{\beta}_{MLE} = \frac{n+1}{n} x_{max} \]
it <- 1000
n_vec <- c(20, 40, 80, 120, 200)
nsamps <- length(n_vec)
mle <- rep(NA, it * nsamps)
for (j in 1:nsamps) {
for (i in 1:it) {
n <- n_vec[j]
x <- runif(n, min = 0, max = 30)
mle[(j - 1) * it + i] <- ((n + 1) / n) * max(x)
}
}
df <- data.frame(stat = mle,
n = rep(n_vec, times = rep(it, nsamps)))
ggplot(df, aes(x = n, y = stat)) + geom_point() + geom_hline(yintercept = 30 - .3, lty = 3) + geom_hline(yintercept = 30 + .3, lty = 3)